# build serve
import os
from pprint import pprint
import time
from paddlenlp import Taskflow
from serve.ocr import ocr_one

schema = ["Title","InvoiceType","InvoiceCode","IssueDate","BuyerName","BuyerTaxCode","SellerName","SellerTaxCode","InvoiceClerk","TaxInclusiveTotalAmount","Remark"]
my_ie = Taskflow("information_extraction", schema=schema, task_path='./checkpoint/model_best')



def ocr_nlp(path ,filename):
    text= ocr_one(path, filename) 
    #text=text.replace(" ", "")
    res = my_ie(text)
    fpinfo={}
    my_dict=res[0]
    for key in my_dict.keys():
        text = my_dict[key][0]['text'].replace(" ", "")
        if key=="Title":
            text = text.replace("(", "")
            text = text.replace(")", "")
            text = text.replace("（", "")
            text = text.replace("）", "")
        fpinfo[key]= text
    return fpinfo